Importation des bibliotheques¶
In [ ]:
import warnings
warnings.simplefilter('ignore')
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import folium
from folium.plugins import HeatMap, MarkerCluster
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, KFold
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
Chargement des données¶
In [ ]:
airbnb = pd.read_csv("airbnb_train.csv")
airbnb.head()
Out[ ]:
| id | log_price | property_type | room_type | amenities | accommodates | bathrooms | bed_type | cancellation_policy | cleaning_fee | ... | last_review | latitude | longitude | name | neighbourhood | number_of_reviews | review_scores_rating | zipcode | bedrooms | beds | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5708593 | 4.317488 | House | Private room | {TV,"Wireless Internet",Kitchen,"Free parking ... | 3 | 1.0 | Real Bed | flexible | False | ... | NaN | 33.782712 | -118.134410 | Island style Spa Studio | Long Beach | 0 | NaN | 90804 | 0.0 | 2.0 |
| 1 | 14483613 | 4.007333 | House | Private room | {"Wireless Internet","Air conditioning",Kitche... | 4 | 2.0 | Real Bed | strict | False | ... | 2017-09-17 | 40.705468 | -73.909439 | Beautiful and Simple Room W/2 Beds, 25 Mins to... | Ridgewood | 38 | 86.0 | 11385 | 1.0 | 2.0 |
| 2 | 10412649 | 7.090077 | Apartment | Entire home/apt | {TV,"Wireless Internet","Air conditioning",Kit... | 6 | 2.0 | Real Bed | flexible | False | ... | NaN | 38.917537 | -77.031651 | 2br/2ba luxury condo perfect for infant / toddler | U Street Corridor | 0 | NaN | 20009 | 2.0 | 2.0 |
| 3 | 17954362 | 3.555348 | House | Private room | {TV,"Cable TV",Internet,"Wireless Internet","A... | 1 | 1.0 | Real Bed | flexible | True | ... | 2017-09-29 | 40.736001 | -73.924248 | Manhattan view from Queens. Lovely single room . | Sunnyside | 19 | 96.0 | 11104 | 1.0 | 1.0 |
| 4 | 9969781 | 5.480639 | House | Entire home/apt | {TV,"Cable TV",Internet,"Wireless Internet",Ki... | 4 | 1.0 | Real Bed | moderate | True | ... | 2017-08-28 | 37.744896 | -122.430665 | Zen Captured Noe Valley House | Noe Valley | 15 | 96.0 | 94131 | 2.0 | 2.0 |
5 rows × 28 columns
Exploration¶
In [ ]:
# Plot de la distribution des prix (log_price)
plt.figure(figsize=(10, 8))
sns.histplot(airbnb["log_price"], kde=True, bins=30)
plt.title('Distribution des prix')
plt.xlabel('Log Price')
plt.ylabel('Density')
plt.show()
In [ ]:
# Heatmap
latitude_mean = airbnb['latitude'].mean()
longitude_mean = airbnb['longitude'].mean()
m = folium.Map(location=[latitude_mean, longitude_mean], zoom_start=5)
heat_data = [[row['latitude'], row['longitude']] for index, row in airbnb.iterrows()]
HeatMap(heat_data).add_to(m)
# Clusters
marker_cluster = MarkerCluster().add_to(m)
# Marqueurs aux clusters
for index, row in airbnb.iterrows():
folium.Marker(
location=[row['latitude'], row['longitude']],
popup=row['name'],
).add_to(marker_cluster)
m
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook